home *** CD-ROM | disk | FTP | other *** search
/ Mac Easy 2010 May / Mac Life Ubuntu.iso / casper / filesystem.squashfs / usr / share / doc / popularity-contest / examples / popanal.py < prev    next >
Encoding:
Python Source  |  2008-11-04  |  7.0 KB  |  279 lines

  1. #!/usr/bin/python 
  2. #
  3. # Read Debian popularity-contest submission data on stdin and produce
  4. # some statistics about it.
  5. #
  6. import sys, string, time, glob, gzip
  7.  
  8. def ewrite(s):
  9.     sys.stderr.write("%s\n" % s)
  10.  
  11.  
  12. class Vote:
  13.     yes = 0
  14.     old_unused = 0
  15.     too_recent = 0
  16.     empty_package = 0
  17.  
  18.     def vote_for(vote, package, entry):
  19.     now = time.time()
  20.     if entry.atime == 0:  # no atime: empty package
  21.         vote.empty_package = vote.empty_package + 1
  22.     elif now - entry.atime > 30 * 24*3600:  # 30 days since last use: old
  23.         vote.old_unused = vote.old_unused + 1
  24.     elif now - entry.ctime < 30 * 24* 3600 \
  25.       and entry.atime - entry.ctime < 24*3600:  # upgraded too recently
  26.         vote.too_recent = vote.too_recent + 1
  27.     else:            # otherwise, vote for this package
  28.         vote.yes = vote.yes + 1
  29.  
  30. UNKNOWN = '**UNKNOWN**'
  31.  
  32. votelist = {}
  33. sectlist = { UNKNOWN : [] }
  34. deplist = {}
  35. provlist = {}
  36. complained = {}
  37. release_list = {}
  38. arch_list = {}
  39. subcount = 0
  40.  
  41. mirrorbase = "/org/ftp.debian.org/ftp"
  42.  
  43. def parse_depends(depline):
  44.     l = []
  45.     split = string.split(depline, ',')
  46.     for d in split:
  47.     x = string.split(d)
  48.     if (x):
  49.         l.append(x[0])
  50.     return l
  51.  
  52.  
  53. def read_depends(filename):
  54.     file = gzip.open(filename, 'r')
  55.     package = None
  56.  
  57.     while 1:
  58.     line = file.readline()
  59.     if line:
  60.         if line[0]==' ' or line[0]=='\t': continue  # continuation
  61.         split = string.split(line, ':')
  62.  
  63.     if not line or split[0]=='Package':
  64.         if package and (len(dep) > 0 or len(prov) > 0):
  65.         deplist[package] = dep
  66.         for d in prov:
  67.             if not provlist.has_key(d):
  68.             provlist[d] = []
  69.             provlist[d].append(package)
  70.         if package:
  71.         if not sectlist.has_key(section):
  72.             sectlist[section] = []
  73.         if not votelist.has_key(package):
  74.             sectlist[section].append(package)
  75.         votelist[package] = Vote()
  76.         ewrite(package)
  77.         package = None
  78.         if line:
  79.         package = string.strip(split[1])
  80.         section = UNKNOWN
  81.         dep = []
  82.         prov = []
  83.     elif split[0]=='Section':
  84.         section = string.strip(split[1])
  85.     elif split[0]=='Depends' or split[0]=='Requires':
  86.         dep = dep + parse_depends(split[1])
  87.     elif split[0]=='Provides':
  88.         prov = parse_depends(split[1])
  89.         
  90.     if not line: break
  91.     
  92.  
  93. class Entry:
  94.     atime = 0;
  95.     ctime = 0;
  96.     mru_file = '';
  97.  
  98.     def __init__(self, atime, ctime, mru_file):
  99.     try:
  100.         self.atime = long(atime)
  101.         self.ctime = long(ctime)
  102.     except:
  103.         self.atime = self.ctime = 0
  104.     self.mru_file = mru_file
  105.  
  106.  
  107. class Submission:
  108.     # format: {package: [atime, ctime, mru_file]}
  109.     entries = {}
  110.  
  111.     start_date = 0
  112.  
  113.     arch = "unknown"
  114.     release= "unknown"
  115.  
  116.     # initialize a new entry with known data
  117.     def __init__(self, version, owner_id, date):
  118.     self.entries = {}
  119.     self.start_date = long(date)
  120.     ewrite('%s:\n\tSTART: %s' % (owner_id, time.ctime(long(date))))
  121.  
  122.     # process a line of input from the survey
  123.     def addinfo(self, split):
  124.     if len(split) < 4:
  125.         ewrite('Invalid input line: ' + `split`)
  126.         return
  127.     self.entries[split[2]] = Entry(split[0], split[1], split[3])
  128.  
  129.     # update the atime of dependency to that of dependant, if newer
  130.     def update_atime(self, dependency, dependant):
  131.     if not self.entries.has_key(dependency): return
  132.     e = self.entries[dependency]
  133.     f = self.entries[dependant]
  134.     if e.atime < f.atime:
  135.         e.atime = f.atime
  136.         e.ctime = f.ctime
  137.  
  138.     # we found the last line of the survey: finish it
  139.     def done(self, date):
  140.     ewrite('\t STOP: after %d seconds, %d packages'
  141.            % (date - self.start_date, len(self.entries)))
  142.     for package in self.entries.keys():
  143.         e = self.entries[package]
  144.         if deplist.has_key(package):
  145.         for d in deplist[package]:
  146.             self.update_atime(d, package)
  147.             if provlist.has_key(d):
  148.             for dd in provlist[d]:
  149.                 self.update_atime(dd, package)
  150.     for package in self.entries.keys():
  151.         if not votelist.has_key(package):
  152.         if not complained.has_key(package):
  153.             ewrite(('Warning: package %s neither in '
  154.                 + 'stable nor unstable')  % package)
  155.             complained[package] = 1
  156.         votelist[package] = Vote()
  157.         sectlist[UNKNOWN].append(package)
  158.         votelist[package].vote_for(package, self.entries[package])
  159.  
  160.         if not release_list.has_key(self.release):
  161.             release_list[self.release] = 1
  162.         else:
  163.             release_list[self.release] = release_list[self.release] + 1
  164.  
  165.         if not arch_list.has_key(self.arch):
  166.             arch_list[self.arch] = 1
  167.         else:
  168.             arch_list[self.arch] = arch_list[self.arch] + 1
  169.  
  170. def headersplit(pairs):
  171.     header = {}
  172.     for d in pairs:
  173.     list = string.split(d, ':')
  174.     try:
  175.         key, value = list
  176.         header[key] = value
  177.     except:
  178.         pass
  179.     return header
  180.  
  181.  
  182. def read_submissions(stream):
  183.     global subcount
  184.     e = None
  185.     while 1:
  186.     line = stream.readline()
  187.     if not line: break
  188.  
  189.     split = string.split(line)
  190.     if not split: continue
  191.  
  192.     if split[0]=='POPULARITY-CONTEST-0':
  193.         header = headersplit(split[1:])
  194.  
  195.         if not header.has_key('ID') or not header.has_key('TIME'):
  196.         ewrite('Invalid header: ' + split)
  197.         continue
  198.  
  199.         subcount = subcount + 1
  200.         ewrite('#%s' % subcount)
  201.         e = None
  202.         try:
  203.         e = Submission(0, header['ID'], header['TIME'])
  204.         except:
  205.         ewrite('Invalid date: ' + header['TIME'] + ' for ID ' + header['ID'])
  206.         continue
  207.  
  208.             if header.has_key('POPCONVER'):
  209.         if header['POPCONVER']=='':
  210.                 e.release = 'unknown'
  211.                 elif header['POPCONVER']=='1.27.bill.1':
  212.                     e.release = '1.27'
  213.         else:
  214.                 e.release = header['POPCONVER']
  215.     
  216.             if header.has_key('ARCH'):
  217.             if header['ARCH']=='x86_64':
  218.                     e.arch = 'amd64'
  219.             elif header['ARCH']=='i386-gnu':
  220.                     e.arch = 'hurd-i386'
  221.         elif header['ARCH']=='':
  222.                     e.arch = 'unknown'
  223.         else:
  224.                     e.arch = header['ARCH']
  225.  
  226.     elif split[0]=='END-POPULARITY-CONTEST-0' and e != None:
  227.         header = headersplit(split[1:])
  228.         if header.has_key('TIME'):
  229.         try:
  230.           date = long(header['TIME'])
  231.         except: 
  232.           ewrite('Invalid date: ' + header['TIME'])
  233.           continue
  234.         e.done(date)
  235.         e = None
  236.  
  237.     elif e != None:
  238.         e.addinfo(split)
  239.     # end of while loop
  240.     ewrite('Processed %d submissions.' % subcount)
  241.  
  242.  
  243. # main program
  244.  
  245. for d in glob.glob('%s/dists/stable/*/binary-i386/Packages.gz' % mirrorbase):
  246.     read_depends(d)
  247. for d in glob.glob('%s/dists/unstable/*/binary-i386/Packages.gz' % mirrorbase):
  248.     read_depends(d)
  249. read_submissions(sys.stdin)
  250.  
  251. def nicename(s):
  252.     new_s = ''
  253.     for c in s:
  254.         if c == '/':
  255.             new_s = new_s + ',';
  256.     elif c in string.letters or c in string.digits or c=='-':
  257.         new_s = new_s + c
  258.     else:
  259.         new_s = new_s + '.'
  260.     return new_s
  261.  
  262. # dump the results
  263. out = open('results', 'w')
  264. out.write("Submissions: %8d\n" % subcount)  
  265. for release in release_list.keys():
  266.     out.write("Release: %-30s %5d\n"
  267.                   % (release, release_list[release]))
  268.  
  269. for arch in arch_list.keys():
  270.     out.write("Architecture: %-30s %5d\n"
  271.                   % (arch, arch_list[arch]))
  272. for section in sectlist.keys():
  273.     for package in sectlist[section]:
  274.     fv = votelist[package]
  275.     out.write("Package: %-30s %5d %5d %5d %5d\n"
  276.           % (package, fv.yes, fv.old_unused,
  277.              fv.too_recent, fv.empty_package))
  278.  
  279.